library(jsonlite)
library(stringr)
library(tidytext)
library(tidyverse)
## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v readr 1.3.0
## v tibble 1.4.2 v purrr 0.2.5
## v tidyr 0.8.2 v dplyr 0.7.8
## v ggplot2 3.1.0 v forcats 0.3.0
## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks jsonlite::flatten()
## x dplyr::lag() masks stats::lag()
library(wordcloud)
## Loading required package: RColorBrewer
library(DT)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ted = read.csv("C:/Users/Mr.Perfectionist/Downloads/DAV/ted.csv")
#The main data looks this way !
datatable(ted,extensions = 'Buttons', options = list(dom = 'Bfrtip', buttons = I('colvis')))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
ted <- ted[c("comments","duration","languages","main_speaker","num_speaker","published_date","ratings","speaker_occupation","tags","views","title")]
#to check the column names of the new table
colnames(ted)
## [1] "comments" "duration" "languages"
## [4] "main_speaker" "num_speaker" "published_date"
## [7] "ratings" "speaker_occupation" "tags"
## [10] "views" "title"
dim(ted)
## [1] 2550 11
#Checking for missing values in the table
sum(is.na(ted))
## [1] 0
colSums(is.na(ted))
## comments duration languages
## 0 0 0
## main_speaker num_speaker published_date
## 0 0 0
## ratings speaker_occupation tags
## 0 0 0
## views title
## 0 0
#Checking for the numerical variables for outliers
ggplot(aes(x = "",y = comments),data = ted) + geom_boxplot() + scale_y_log10(labels = scales::comma) +
labs(title = "No. of Comments On Them") + theme_minimal()

ggplot(aes(x = "",y = views),data = ted) + geom_boxplot() + scale_y_log10(labels = scales::comma) +
labs(title = "Number of Views") + theme_minimal()

par(mfrow = c(1,3))
hist(ted$num_speaker)
boxplot(ted$languages, main = "No. of languages")
boxplot(ted$duration,main = "Duration (in seconds)")

#Conversion of the Published date to a date format while creating Month & Year column
ted$published_date <- as.Date(as.character(ymd_hms(as.POSIXct(as.numeric(ted$published_date),origin = '1970-01-01', tz = "GMT"))),format = "%Y-%m-%d")
ted$published_month <- factor(month.abb[month(ted$published_date)])
ted$published_year <- year(ted$published_date)
#Adding a column so that the primary key for the dataset where each sno will be an identifier for each talk
id <- ted%>% summarise(serialno = n())
ted$serialno <- seq(1,as.numeric(id))
#Taking values in json to get the values in rows
df1 <- c()
for (i in 1:2550)
{
df <- fromJSON(str_replace_all(ted$rating[i],"'",'"'))
df$serialno <- i
df1 <- rbind(df,df1)
}
#Creating a table to match with the ratings
ted_ratings <- df1
#Checking the types of rating
df %>% distinct(name)
## name
## 1 Unconvincing
## 2 Informative
## 3 Inspiring
## 4 OK
## 5 Fascinating
## 6 Ingenious
## 7 Confusing
## 8 Obnoxious
## 9 Beautiful
## 10 Longwinded
## 11 Persuasive
## 12 Jaw-dropping
## 13 Courageous
## 14 Funny
#Classified the distinct rating types to positive, negative and neutral ratings
negative_words <- c('Unconvincing','Confusing','Obnoxious','Longwinded')
positive_words <- c('Informative','Inspiring','Fascinating','Ingenious','Beautiful','Persuasive','Jaw-dropping','Courageous','Funny')
df1$ratings_type <- ifelse(df1$name %in% unlist(negative_words),'negative_ratings',ifelse(df1$name %in% unlist(positive_words),'positive_ratings',ifelse(df1$name == 'OK','neutral_ratings',' ')))
ted <- df1 %>% group_by(serialno,ratings_type) %>% summarise(count_rating_type = sum(count)) %>% spread(ratings_type,count_rating_type) %>% ungroup() %>%inner_join(ted,by = "serialno")
#Creation of the final dataset
ted_final <- ted %>%select(c("serialno","main_speaker","title","num_speaker","comments","positive_ratings","negative_ratings","neutral_ratings","duration","languages","speaker_occupation","views","published_month","published_year","published_date")) %>%mutate(ratings = positive_ratings + negative_ratings + neutral_ratings)
#ANALYSIS
#Analyzing the number of talks over the years
ted_final %>%
group_by(published_year) %>%
summarise(n = n()) %>%
ggplot(aes(x = factor(published_year),y = n,group = 1)) +
geom_line(color = "Blue") + geom_point(lwd = 2, color = "blue") +
labs(title = "Number of Talks over the years", x = "Published Year", y = "# of Talks") +
geom_hline(aes(yintercept = mean(n)), linetype = "dashed", alpha = .5) +
annotate("text", x = '2007', y = 210, label = "Average: 212.5", size = 3) +
theme_minimal()

#Analyzing the number of views published per year
ted_final %>%
group_by(published_year) %>%
summarise(avg_views = mean(views/100000)) %>%
ggplot(aes(x = factor(published_year),y = avg_views,group = 1)) +
geom_line(color = "red") +
geom_point(lwd = 2, color = "red") +
labs(title = "Views by Published Year", x = "Published Year" , y = "Average # of views(in hundred thousands)") +
geom_hline(aes(yintercept = mean(avg_views)), linetype = "dashed", alpha = .5) +
annotate("text", x = '2007', y = 19, label = "Average: 1,838,604", size = 3) +
annotate("text", x = '2007', y = 42, label = "Max: 4,130,967", size = 3) +
theme_minimal()

#Analyzing the number of comments published per year
ted_final %>%
mutate(published_year1 = as.factor(published_year)) %>%
group_by(published_year1) %>%
summarise(avg_comments = mean(comments)) %>%
ggplot(aes(x = published_year1, y = avg_comments)) +
geom_point(col = "tomato2", size = 3) +
geom_segment(aes(x = published_year1,xend = published_year1,y = min(avg_comments),yend = max(avg_comments)),linetype = "dashed",size = 0.05) +
coord_flip() +
labs(title = "Number of Comments by Published Year", x = "Published year", y = "Average # of Comments") +
theme_minimal()

#Analysing the number of comments per year
ted_final %>%
mutate(published_year1 = as.factor(published_year)) %>%
group_by(published_year1) %>%
summarise(avg_ratings = mean(ratings)) %>%
ggplot(aes(x = published_year1, y = avg_ratings)) +
geom_point(col = "tomato2", size = 3) +
geom_segment(aes(x = published_year1,xend = published_year1,y = min(avg_ratings),yend = max(avg_ratings)),linetype = "dashed",size = 0.05) +
coord_flip() +
labs(title = "Number of Comments by Published Year", x = "Published year", y = "Average # of Ratings")+
theme_minimal()

#Displaying the percentage of positive,negative,neutral ratings per year through a stacked bar chart
ted_final %>%
group_by(published_year) %>%
summarise(Perc_Positive_Ratings= sum(positive_ratings)/sum(ratings), Perc_Negative_Ratings = sum(negative_ratings)/sum(ratings), Perc_Neutral_Ratings = sum(neutral_ratings)/sum(ratings)) %>%
gather(Type, Perc_rating ,-published_year) %>%
ggplot(aes(x = published_year, y = Perc_rating, fill = Type)) + geom_bar(stat = "identity") +
labs(title = "Percentage of Positive, Negative and Neutral Ratings by Published Year", x = "Published year", y = "% of Ratings") +
scale_y_continuous(labels = scales::percent) +
theme_minimal()

#Obtaining the top 10 talks of the year
datatable(ted_final %>%
arrange(desc(views)) %>%
select( title, main_speaker, views, published_date,comments,ratings) %>%
head(10))
#Defining the views category
ted_final$view_category <-
ifelse(between(ted_final$views,quantile(ted_final$views,0),quantile(ted_final$views,0.20)),'Worst',
ifelse(between(ted_final$views,quantile(ted_final$views,0.20),quantile(ted_final$views,0.40)),'Bad',
ifelse(between(ted_final$views,quantile(ted_final$views,0.40),quantile(ted_final$views,0.60)),'Ok',
ifelse(between(ted_final$views,quantile(ted_final$views,0.60),quantile(ted_final$views,0.80)),'Good',
ifelse(ted_final$views > quantile(ted_final$views,0.80),'Best','NA')))))
#adding more levels to the column
vcat_order <- c('Best','Good','Ok','Bad','Worst')
ted_final$view_category <- factor(ted_final$view_category, levels = vcat_order)
view_cat <- ted_final %>%
group_by(view_category) %>%
summarise(Min_Views = min(views),Max_Views = max(views)) %>%
arrange(desc(Min_Views))
datatable(view_cat)
##DOES THE PUBLSIHING MONTH AFFECT THE TALKS?
# Adding levels to published month
month_order <- c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')
ted_final$published_month <- factor(ted_final$published_month, levels = month_order)
#Plotting Views by published month over last 7 years
ted_final %>%
filter(published_year >= 2010) %>%
group_by(published_year,published_month) %>%
summarise(m_views = sum(views)) %>%
inner_join(ted_final %>%
filter(published_year >= 2010) %>%
group_by(published_year) %>%
summarise(y_views = sum(views)),by = "published_year") %>%
mutate(perc_views = m_views/y_views) %>%
ggplot(aes(x = published_month,y = perc_views,group = 1, color = published_year)) +
geom_point() + geom_line() + facet_wrap(~published_year,ncol = 1) +
scale_y_continuous(labels = scales::percent) +
labs(x = "Published Month", y = "Percent COntribution in Yearly views", title = "Monthly percentage views over years 2010-2017 - Seasonality") +
theme_minimal()

##DOES THE OCCUPATION OF THE SPEAKER AFFECT THE TALKS?
#creation of a function to create a wordcloud and the frequency chart by the view category used in the function call
generate_cloud_grph <- function(v_cat){
df_wc <- as.data.frame(ted_final %>%
subset(view_category == v_cat,select = c(speaker_occupation,view_category)) %>%
count(speaker_occupation, sort = TRUE))
wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n, min.freq = 1,
max.words = 100, random.order = FALSE, rot.per = 0.35,
colors = brewer.pal(8, "Dark2"))
ted_final %>%
filter(view_category == v_cat) %>%
group_by(speaker_occupation) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
head(10) %>%
ggplot(aes(x = reorder(speaker_occupation,n), y = n, label = n)) +
geom_point(size = 6) +
geom_segment(aes(x = speaker_occupation,
xend = speaker_occupation,
y = 0,
yend = n)) +
geom_text(color = "white", size = 3) + coord_flip() +
labs(x = "Frequency",y = "Speaker Occupation") +
theme_classic()
}
generate_cloud_grph("Best")
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Global health expert; data visionary could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Actor and activist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Cognitive neuroscientist could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Philosopher, cognitive scientist could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Psychologist; happiness expert could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Actor, comedian, playwright could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Anthropologist, expert on love could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Author, entrepreneur could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Autonomous systems pioneer could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Cognitive scientist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Comedian and writer could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Computer scientist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Education activist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Education researcher could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Educator and social entrepreneur could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Evolutionary biologist could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Game Designer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Global author could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Humorist, web artist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Investor, human guinea pig, author could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Management thinker could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Marketer and author could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Marketer, success analyst could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Neuroscientist, philosopher could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Organizational psychologist could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Physicist could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Psycho-economist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Relationship therapist could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Serial entrepreneur could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Sound consultant could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Surgeon could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Technologist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Technologist, entrepreneur could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Theoretical physicist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Vulnerability researcher could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Whistleblower could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Writer and filmmaker could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Aircraft engineer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Anthropologist, ethnobotanist could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Clinical psychologist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Comedian and Mental Health Activist could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Comedian, activist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Femto-photographer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Functional neurosurgeon could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Leadership educator could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Moral philosopher could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Neuroscientist, stand-up comic could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : North Korean refugee could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Nuclear scientist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Particle physicist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Physician, author could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Product creator could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Radio researcher could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Social energy entrepreneur could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Stellar astronomer could not be fit on page. It will not be
## plotted.
generate_cloud_grph("Worst")
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Journalist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Biologist could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Engineer could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Entrepreneur could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Musician could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Physicist could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Technologist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Educator could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Game designer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Oceanographer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Politician could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Sculptor could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Singer-songwriter could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Social entrepreneur could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Tech visionary could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Activist could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Arab sexuality expert could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Cartoonist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Chemist could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Chief Economist and Senior Vice President, World Bank could
## not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Computer theorist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Conservationist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Design curator could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : DNA origamist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Electronic music pioneer could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Environmentalist, futurist could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Evolutionary biologist could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Graphic designer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Guitarist could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Legal activist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Marine ecologist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Mathematician could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Musician, activist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Neuroscientist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Photojournalist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Professor could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Public intellectual could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Science writer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Social commentator could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Soprano could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Space activist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : TED Curator could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Tree researcher could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Venture capitalist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Child protection leader, activist, author could not be fit
## on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Activist and social entrepreneur could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : AI pioneer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Artificial life researcher could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Atmospheric chemist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Author, thinker could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Bioengineer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Botanist and activist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : British Prime Minister could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Clinical professor of law could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Clinical researcher could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Complexity theorist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Composer, conductor could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Digital money and identity consultant could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Endurance runner could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Fisheries biologist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Founder, GrameenPhone could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : High-wire artist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Information security specialist could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Kenyan MP could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Microinsurer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Movement artist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Natural resources expert could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Ornithologist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Paleoanthropologist could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Performance poet, multimedia artist could not be fit on
## page. It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Philosopher could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Physicist, education activist could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Poet, playwright could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Research scientist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Science journalist could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Scientific visualization expert could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Singer, performance artist could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Social Justice Activist could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Visualizer could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df_wc$speaker_occupation, freq = df_wc$n,
## min.freq = 1, : Zoologist could not be fit on page. It will not be plotted.
##ARE THE LONGER TALKS VIEWED LESS?
#creating the interactive box plot of duration by category of TED talk
ted_final %>%
plot_ly(y = ~duration, color = ~view_category, type = "box")
cor(ted_final$views,ted_final$duration)
## [1] 0.04874043
